## data analysis '53 paper 
## models with 1946 elections results table
## use simplified approach from Li et al. (1991) to compute p-values for Wald tests
## see also Schaefer 1997: 115-16

rm(list = ls())

library(readxl)
library(multiwayvcov)
library(lmtest)
library(Hmisc)

setwd("~/Dropbox/Current projects/1953/Replication archive")


## choose signal strength measure to employ
signal <- "lrm_4"
jamming <- ""
adjust <- ""

load(paste("1953data", signal, adjust, jamming, ".RData", sep = ""))


## rescale distance to East Berlin so that coefficients are not too small
dat$distance_to_berlin <- dat$distance_to_berlin/100


table1 <- matrix(NA,41,4)
rownames(table1) <- c(	"intercept",
						"county capital",
						"KVP base",
						"$%$ agriculture and forestry",
						"$%$ mining",
						"$%$ metalworking industry",
						"$%$ chemical industry",
						"$%$ woods and plastics",
						"$%$ consumer goods",
						"$%$ construction",
						"$%$ transportation",
						"$%$ commerce",
						"$%$ services",
						"log(population size)",
						"$%$ population male",
						"population density",
						"$%$ population change 1939--50",
						"$%$ population in same {\\it Land}",
						"$%$ 8 years of education",
						"$%$ 10 years of education",
						"$%$ 12 years of education",
						"$%$ more than 12 years of education",
						"$%$ trade school degree",
						"$%$ university degree",
						"$%$ protestant",
						"$%$ catholic",
						"$%$ agnostic",
						"living space per capita (m$^2$)", 
						"$%$ turnout '46",
						"$%$ invalid votes '46",
						"$%$ LDP '46",
						"$%$ CDU '46",
						"RIAS signal strength",
						"RIAS signal strength$^2$",
						"RIAS signal strength$^3$",
						"distance to East Berlin (100 km)",
						"distance to East Berlin$^2$ (100 km)",
						"distance to East Berlin$^3$ (100 km)",
						"Wald test RIAS $p$-value",
						"Wald test distance $p$-value",
						"district fixed effects")

table1[41,c(1,3)] <- c("No","Yes")



######################################
## probit model 1 --- no fixed effects
######################################
source("hl2.R")	## uses t-distribution with estimated degrees of freedom for MI

probit.out.m1 <- glm(protest ~

							county_capital +
							KVP +
							LandForstwirtschaft +
							Bergbau +
							Metallverarbeitung +
							Chemie +
							HolzKunstmassen +
							Verbrauchsgueter +
							Bauwirtschaft +
							Verkehrswesen +
							Handel +
							Dienstleistung +
							log(population) +
							prop_male +
							density +
							pop_change +
							prop_Land_same39 +
							prop_educ_8 +
							prop_educ_10 +
							prop_educ_12 +
							prop_educ_12plus +
							prop_educ_Fachschule +
							prop_educ_Hochschule +
							prop_evangelisch +
							prop_katholisch +
							prop_glaubenslos +
							living_space +
							turnout.m1 +
							invalid.m1 +
							LDP.m1 +
							CDU.m1 +

							signal + I(signal^2) + I(signal^3) +
							distance_to_berlin +
							I(distance_to_berlin^2) +
							I(distance_to_berlin^3) +
							as.factor(size),
							x = TRUE, y = TRUE, model = TRUE,
							data = dat, family = binomial(link = "probit"))	

summary(probit.out.m1)


## clustered standard errors
vcov.cl.m1 <- cluster.vcov(probit.out.m1, dat$county)
temp.m1 <- coeftest(probit.out.m1, vcov.cl.m1)



probit.out.m2 <- glm(protest ~

							county_capital +
							KVP +
							LandForstwirtschaft +
							Bergbau +
							Metallverarbeitung +
							Chemie +
							HolzKunstmassen +
							Verbrauchsgueter +
							Bauwirtschaft +
							Verkehrswesen +
							Handel +
							Dienstleistung +
							log(population) +
							prop_male +
							density +
							pop_change +
							prop_Land_same39 +
							prop_educ_8 +
							prop_educ_10 +
							prop_educ_12 +
							prop_educ_12plus +
							prop_educ_Fachschule +
							prop_educ_Hochschule +
							prop_evangelisch +
							prop_katholisch +
							prop_glaubenslos +
							living_space +
							turnout.m2 +
							invalid.m2 +
							LDP.m2 +
							CDU.m2 +

							signal + I(signal^2) + I(signal^3) +
							distance_to_berlin +
							I(distance_to_berlin^2) +
							I(distance_to_berlin^3) +
							as.factor(size),
							x = TRUE, y = TRUE, model = TRUE,
							data = dat, family = binomial(link = "probit"))	

summary(probit.out.m2)


## clustered standard errors
vcov.cl.m2 <- cluster.vcov(probit.out.m2, dat$county)
temp.m2 <- coeftest(probit.out.m2, vcov.cl.m2)



probit.out.m3 <- glm(protest ~

							county_capital +
							KVP +
							LandForstwirtschaft +
							Bergbau +
							Metallverarbeitung +
							Chemie +
							HolzKunstmassen +
							Verbrauchsgueter +
							Bauwirtschaft +
							Verkehrswesen +
							Handel +
							Dienstleistung +
							log(population) +
							prop_male +
							density +
							pop_change +
							prop_Land_same39 +
							prop_educ_8 +
							prop_educ_10 +
							prop_educ_12 +
							prop_educ_12plus +
							prop_educ_Fachschule +
							prop_educ_Hochschule +
							prop_evangelisch +
							prop_katholisch +
							prop_glaubenslos +
							living_space +
							turnout.m3 +
							invalid.m3 +
							LDP.m3 +
							CDU.m3 +

							signal + I(signal^2) + I(signal^3) +
							distance_to_berlin +
							I(distance_to_berlin^2) +
							I(distance_to_berlin^3) +
							as.factor(size),
							x = TRUE, y = TRUE, model = TRUE,
							data = dat, family = binomial(link = "probit"))	

summary(probit.out.m3)


## clustered standard errors
vcov.cl.m3 <- cluster.vcov(probit.out.m3, dat$county)
temp.m3 <- coeftest(probit.out.m3, vcov.cl.m3)



probit.out.m4 <- glm(protest ~

							county_capital +
							KVP +
							LandForstwirtschaft +
							Bergbau +
							Metallverarbeitung +
							Chemie +
							HolzKunstmassen +
							Verbrauchsgueter +
							Bauwirtschaft +
							Verkehrswesen +
							Handel +
							Dienstleistung +
							log(population) +
							prop_male +
							density +
							pop_change +
							prop_Land_same39 +
							prop_educ_8 +
							prop_educ_10 +
							prop_educ_12 +
							prop_educ_12plus +
							prop_educ_Fachschule +
							prop_educ_Hochschule +
							prop_evangelisch +
							prop_katholisch +
							prop_glaubenslos +
							living_space +
							turnout.m4 +
							invalid.m4 +
							LDP.m4 +
							CDU.m4 +

							signal + I(signal^2) + I(signal^3) +
							distance_to_berlin +
							I(distance_to_berlin^2) +
							I(distance_to_berlin^3) +
							as.factor(size),
							x = TRUE, y = TRUE, model = TRUE,
							data = dat, family = binomial(link = "probit"))	

summary(probit.out.m4)


## clustered standard errors
vcov.cl.m4 <- cluster.vcov(probit.out.m4, dat$county)
temp.m4 <- coeftest(probit.out.m4, vcov.cl.m4)



probit.out.m5 <- glm(protest ~

							county_capital +
							KVP +
							LandForstwirtschaft +
							Bergbau +
							Metallverarbeitung +
							Chemie +
							HolzKunstmassen +
							Verbrauchsgueter +
							Bauwirtschaft +
							Verkehrswesen +
							Handel +
							Dienstleistung +
							log(population) +
							prop_male +
							density +
							pop_change +
							prop_Land_same39 +
							prop_educ_8 +
							prop_educ_10 +
							prop_educ_12 +
							prop_educ_12plus +
							prop_educ_Fachschule +
							prop_educ_Hochschule +
							prop_evangelisch +
							prop_katholisch +
							prop_glaubenslos +
							living_space +
							turnout.m5 +
							invalid.m5 +
							LDP.m5 +
							CDU.m5 +

							signal + I(signal^2) + I(signal^3) +
							distance_to_berlin +
							I(distance_to_berlin^2) +
							I(distance_to_berlin^3) +
							as.factor(size),
							x = TRUE, y = TRUE, model = TRUE,
							data = dat, family = binomial(link = "probit"))	

summary(probit.out.m5)


## clustered standard errors
vcov.cl.m5 <- cluster.vcov(probit.out.m5, dat$county)
temp.m5 <- coeftest(probit.out.m5, vcov.cl.m5)


####################################
## combine estimates and compute SEs
####################################
est.mi <- cbind(	temp.m1[,1],
					temp.m2[,1],
					temp.m3[,1],
					temp.m4[,1],
					temp.m5[,1]
					)

se.mi <- cbind(		temp.m1[,2],
					temp.m2[,2],
					temp.m3[,2],
					temp.m4[,2],
					temp.m5[,2]
					)

## using Rubin's notation (2002: 87)
theta.bar <- apply(est.mi,1,mean)
W.bar <- apply(se.mi,1,mean)
B <- (est.mi - replicate(5,theta.bar))^2
B <- apply(B,1,sum)/4
Tot <- W.bar + B*6/5

v <- 4 * (1 + 1/6*W.bar/B)^2

table1[1:38,1:2] <- hl2(cbind(theta.bar, sqrt(Tot)), v = v)[1:38,]


## Wald test RIAS
sel <- 33:35

w1 <- probit.out.m1$coef[sel] %*% solve(vcov.cl.m1[sel,sel]) %*% probit.out.m1$coef[sel]
w2 <- probit.out.m2$coef[sel] %*% solve(vcov.cl.m2[sel,sel]) %*% probit.out.m2$coef[sel]
w3 <- probit.out.m3$coef[sel] %*% solve(vcov.cl.m3[sel,sel]) %*% probit.out.m3$coef[sel]
w4 <- probit.out.m4$coef[sel] %*% solve(vcov.cl.m4[sel,sel]) %*% probit.out.m4$coef[sel]
w5 <- probit.out.m5$coef[sel] %*% solve(vcov.cl.m5[sel,sel]) %*% probit.out.m5$coef[sel]

w.mean <- mean(c(w1,w2,w3,w4,w5))
w.mean.sq <- mean(c(sqrt(w1),sqrt(w2),sqrt(w3),sqrt(w4),sqrt(w5)))

temp <- sum(c(
			(sqrt(w1) - w.mean.sq)^2,
			(sqrt(w2) - w.mean.sq)^2,
			(sqrt(w3) - w.mean.sq)^2,
			(sqrt(w4) - w.mean.sq)^2,
			(sqrt(w5) - w.mean.sq)^2
			))

r <- 1.2 * 1/4 * temp
W <- (w.mean/3 - 6/4*r)/(1+r)
v <- 3^(-3/5) * 4 * (1+1/r)^2
table1[39,1] <- round(1 - pf(W,3,v),3)


## Wald test distance
sel <- 36:38

w1 <- probit.out.m1$coef[sel] %*% solve(vcov.cl.m1[sel,sel]) %*% probit.out.m1$coef[sel]
w2 <- probit.out.m2$coef[sel] %*% solve(vcov.cl.m2[sel,sel]) %*% probit.out.m2$coef[sel]
w3 <- probit.out.m3$coef[sel] %*% solve(vcov.cl.m3[sel,sel]) %*% probit.out.m3$coef[sel]
w4 <- probit.out.m4$coef[sel] %*% solve(vcov.cl.m4[sel,sel]) %*% probit.out.m4$coef[sel]
w5 <- probit.out.m5$coef[sel] %*% solve(vcov.cl.m5[sel,sel]) %*% probit.out.m5$coef[sel]

w.mean <- mean(c(w1,w2,w3,w4,w5))
w.mean.sq <- mean(c(sqrt(w1),sqrt(w2),sqrt(w3),sqrt(w4),sqrt(w5)))

temp <- sum(c(
			(sqrt(w1) - w.mean.sq)^2,
			(sqrt(w2) - w.mean.sq)^2,
			(sqrt(w3) - w.mean.sq)^2,
			(sqrt(w4) - w.mean.sq)^2,
			(sqrt(w5) - w.mean.sq)^2
			))

r <- 1.2 * 1/4 * temp
W <- (w.mean/3 - 6/4*r)/(1+r)
v <- 3^(-3/5) * 4 * (1+1/r)^2
table1[40,1] <- round(1 - pf(W,3,v),3)



###################################
## probit model 2 --- fixed effects
###################################
probit.out.m1 <- glm(protest ~

							county_capital +
							KVP +
							LandForstwirtschaft +
							Bergbau +
							Metallverarbeitung +
							Chemie +
							HolzKunstmassen +
							Verbrauchsgueter +
							Bauwirtschaft +
							Verkehrswesen +
							Handel +
							Dienstleistung +
							log(population) +
							prop_male +
							density +
							pop_change +
							prop_Land_same39 +
							prop_educ_8 +
							prop_educ_10 +
							prop_educ_12 +
							prop_educ_12plus +
							prop_educ_Fachschule +
							prop_educ_Hochschule +
							prop_evangelisch +
							prop_katholisch +
							prop_glaubenslos +
							living_space +
							turnout.m1 +
							invalid.m1 +
							LDP.m1 +
							CDU.m1 +

							signal + I(signal^2) + I(signal^3) +
							distance_to_berlin +
							I(distance_to_berlin^2) +
							I(distance_to_berlin^3) +
							as.factor(size) +
							as.factor(district),
							x = TRUE, y = TRUE, model = TRUE,
							data = dat, family = binomial(link = "probit"))	

summary(probit.out.m1)


## clustered standard errors
vcov.cl.m1 <- cluster.vcov(probit.out.m1, dat$county)
temp.m1 <- coeftest(probit.out.m1, vcov.cl.m1)



probit.out.m2 <- glm(protest ~

							county_capital +
							KVP +
							LandForstwirtschaft +
							Bergbau +
							Metallverarbeitung +
							Chemie +
							HolzKunstmassen +
							Verbrauchsgueter +
							Bauwirtschaft +
							Verkehrswesen +
							Handel +
							Dienstleistung +
							log(population) +
							prop_male +
							density +
							pop_change +
							prop_Land_same39 +
							prop_educ_8 +
							prop_educ_10 +
							prop_educ_12 +
							prop_educ_12plus +
							prop_educ_Fachschule +
							prop_educ_Hochschule +
							prop_evangelisch +
							prop_katholisch +
							prop_glaubenslos +
							living_space +
							turnout.m2 +
							invalid.m2 +
							LDP.m2 +
							CDU.m2 +

							signal + I(signal^2) + I(signal^3) +
							distance_to_berlin +
							I(distance_to_berlin^2) +
							I(distance_to_berlin^3) +
							as.factor(size) +
							as.factor(district),
							x = TRUE, y = TRUE, model = TRUE,
							data = dat, family = binomial(link = "probit"))	

summary(probit.out.m2)


## clustered standard errors
vcov.cl.m2 <- cluster.vcov(probit.out.m2, dat$county)
temp.m2 <- coeftest(probit.out.m2, vcov.cl.m2)



probit.out.m3 <- glm(protest ~

							county_capital +
							KVP +
							LandForstwirtschaft +
							Bergbau +
							Metallverarbeitung +
							Chemie +
							HolzKunstmassen +
							Verbrauchsgueter +
							Bauwirtschaft +
							Verkehrswesen +
							Handel +
							Dienstleistung +
							log(population) +
							prop_male +
							density +
							pop_change +
							prop_Land_same39 +
							prop_educ_8 +
							prop_educ_10 +
							prop_educ_12 +
							prop_educ_12plus +
							prop_educ_Fachschule +
							prop_educ_Hochschule +
							prop_evangelisch +
							prop_katholisch +
							prop_glaubenslos +
							living_space +
							turnout.m3 +
							invalid.m3 +
							LDP.m3 +
							CDU.m3 +

							signal + I(signal^2) + I(signal^3) +
							distance_to_berlin +
							I(distance_to_berlin^2) +
							I(distance_to_berlin^3) +
							as.factor(size) +
							as.factor(district),
							x = TRUE, y = TRUE, model = TRUE,
							data = dat, family = binomial(link = "probit"))	

summary(probit.out.m3)


## clustered standard errors
vcov.cl.m3 <- cluster.vcov(probit.out.m3, dat$county)
temp.m3 <- coeftest(probit.out.m3, vcov.cl.m3)



probit.out.m4 <- glm(protest ~

							county_capital +
							KVP +
							LandForstwirtschaft +
							Bergbau +
							Metallverarbeitung +
							Chemie +
							HolzKunstmassen +
							Verbrauchsgueter +
							Bauwirtschaft +
							Verkehrswesen +
							Handel +
							Dienstleistung +
							log(population) +
							prop_male +
							density +
							pop_change +
							prop_Land_same39 +
							prop_educ_8 +
							prop_educ_10 +
							prop_educ_12 +
							prop_educ_12plus +
							prop_educ_Fachschule +
							prop_educ_Hochschule +
							prop_evangelisch +
							prop_katholisch +
							prop_glaubenslos +
							living_space +
							turnout.m4 +
							invalid.m4 +
							LDP.m4 +
							CDU.m4 +

							signal + I(signal^2) + I(signal^3) +
							distance_to_berlin +
							I(distance_to_berlin^2) +
							I(distance_to_berlin^3) +
							as.factor(size) +
							as.factor(district),
							x = TRUE, y = TRUE, model = TRUE,
							data = dat, family = binomial(link = "probit"))	

summary(probit.out.m4)


## clustered standard errors
vcov.cl.m4 <- cluster.vcov(probit.out.m4, dat$county)
temp.m4 <- coeftest(probit.out.m4, vcov.cl.m4)



probit.out.m5 <- glm(protest ~

							county_capital +
							KVP +
							LandForstwirtschaft +
							Bergbau +
							Metallverarbeitung +
							Chemie +
							HolzKunstmassen +
							Verbrauchsgueter +
							Bauwirtschaft +
							Verkehrswesen +
							Handel +
							Dienstleistung +
							log(population) +
							prop_male +
							density +
							pop_change +
							prop_Land_same39 +
							prop_educ_8 +
							prop_educ_10 +
							prop_educ_12 +
							prop_educ_12plus +
							prop_educ_Fachschule +
							prop_educ_Hochschule +
							prop_evangelisch +
							prop_katholisch +
							prop_glaubenslos +
							living_space +
							turnout.m5 +
							invalid.m5 +
							LDP.m5 +
							CDU.m5 +

							signal + I(signal^2) + I(signal^3) +
							distance_to_berlin +
							I(distance_to_berlin^2) +
							I(distance_to_berlin^3) +
							as.factor(size) +
							as.factor(district),
							x = TRUE, y = TRUE, model = TRUE,
							data = dat, family = binomial(link = "probit"))	

summary(probit.out.m5)


## clustered standard errors
vcov.cl.m5 <- cluster.vcov(probit.out.m5, dat$county)
temp.m5 <- coeftest(probit.out.m5, vcov.cl.m5)



####################################
## combine estimates and compute SEs
####################################
est.mi <- cbind(	temp.m1[,1],
					temp.m2[,1],
					temp.m3[,1],
					temp.m4[,1],
					temp.m5[,1]
					)

se.mi <- cbind(		temp.m1[,2],
					temp.m2[,2],
					temp.m3[,2],
					temp.m4[,2],
					temp.m5[,2]
					)

## using Rubin's notation (2002: 87)
theta.bar <- apply(est.mi,1,mean)
W.bar <- apply(se.mi,1,mean)
B <- (est.mi - replicate(5,theta.bar))^2
B <- apply(B,1,sum)/4
Tot <- W.bar + B*6/5

v <- 4 * (1 + 1/6*W.bar/B)^2

table1[1:38,3:4] <- hl2(cbind(theta.bar, sqrt(Tot)), v = v)[1:38,]


## Wald test RIAS
sel <- 33:35

w1 <- probit.out.m1$coef[sel] %*% solve(vcov.cl.m1[sel,sel]) %*% probit.out.m1$coef[sel]
w2 <- probit.out.m2$coef[sel] %*% solve(vcov.cl.m2[sel,sel]) %*% probit.out.m2$coef[sel]
w3 <- probit.out.m3$coef[sel] %*% solve(vcov.cl.m3[sel,sel]) %*% probit.out.m3$coef[sel]
w4 <- probit.out.m4$coef[sel] %*% solve(vcov.cl.m4[sel,sel]) %*% probit.out.m4$coef[sel]
w5 <- probit.out.m5$coef[sel] %*% solve(vcov.cl.m5[sel,sel]) %*% probit.out.m5$coef[sel]

w.mean <- mean(c(w1,w2,w3,w4,w5))
w.mean.sq <- mean(c(sqrt(w1),sqrt(w2),sqrt(w3),sqrt(w4),sqrt(w5)))

temp <- sum(c(
			(sqrt(w1) - w.mean.sq)^2,
			(sqrt(w2) - w.mean.sq)^2,
			(sqrt(w3) - w.mean.sq)^2,
			(sqrt(w4) - w.mean.sq)^2,
			(sqrt(w5) - w.mean.sq)^2
			))

r <- 1.2 * 1/4 * temp
W <- (w.mean/3 - 6/4*r)/(1+r)
v <- 3^(-3/5) * 4 * (1+1/r)^2
table1[39,3] <- round(1 - pf(W,3,v),3)


## Wald test distance
sel <- 36:38

w1 <- probit.out.m1$coef[sel] %*% solve(vcov.cl.m1[sel,sel]) %*% probit.out.m1$coef[sel]
w2 <- probit.out.m2$coef[sel] %*% solve(vcov.cl.m2[sel,sel]) %*% probit.out.m2$coef[sel]
w3 <- probit.out.m3$coef[sel] %*% solve(vcov.cl.m3[sel,sel]) %*% probit.out.m3$coef[sel]
w4 <- probit.out.m4$coef[sel] %*% solve(vcov.cl.m4[sel,sel]) %*% probit.out.m4$coef[sel]
w5 <- probit.out.m5$coef[sel] %*% solve(vcov.cl.m5[sel,sel]) %*% probit.out.m5$coef[sel]

w.mean <- mean(c(w1,w2,w3,w4,w5))
w.mean.sq <- mean(c(sqrt(w1),sqrt(w2),sqrt(w3),sqrt(w4),sqrt(w5)))

temp <- sum(c(
			(sqrt(w1) - w.mean.sq)^2,
			(sqrt(w2) - w.mean.sq)^2,
			(sqrt(w3) - w.mean.sq)^2,
			(sqrt(w4) - w.mean.sq)^2,
			(sqrt(w5) - w.mean.sq)^2
			))

r <- 1.2 * 1/4 * temp
W <- (w.mean/3 - 6/4*r)/(1+r)
v <- 3^(-3/5) * 4 * (1+1/r)^2
table1[40,3] <- round(1 - pf(W,3,v),3)


latex(table1, file = "")



